from sklearn.decomposition import PCA
from utils.mlmodel_util import preprocess
from utils.mlmodel_util import get_model_info_sklearn
from utils.model_util import load_model
from utils.format_util import dup_name_handler
import numpy as np


def predict(dataframe, para_list, output):
    # --------------------load the model-----------------
    model_save_path, _, feature_list = get_model_info_sklearn(para_list["model_id"])
    mlmodel = load_model(model_save_path)

    # --------------------make predictions----------------
    X = preprocess(dataframe, para_list, feature_list)
    pred = mlmodel.transform(X)

    output_cols = []
    print("XX")
    print(pred)
    for i in range(pred.shape[1]):
        output_col = "_pc_{}_".format(i + 1)
        output_col = dup_name_handler(output_col, para_list["feature_col"] + [para_list["label_col"]])
        output_cols.append(output_col)
        dataframe[output_col] = pred[:, i]
    output["result"]["output_params"]["output_cols"] = output_cols

    # dataframe = dataframe.drop(para_list["feature_col"], axis=1)

    return dataframe


def train(dataframe, para_list, record):
    # --------------------prepare data------------------
    feature_list = []
    X = preprocess(dataframe, para_list, feature_list)

    # ---------------------model fit---------------------
    k = para_list["k"]
    mlmodel = PCA(k).fit(X)
    pred = mlmodel.transform(X)
    # ----------------------record info-------------------
    explained_variance = mlmodel.explained_variance_.tolist()
    ev_chart = []
    total = np.sum(explained_variance)
    sum = 0
    for i in range(k):
        sum = sum + explained_variance[i]
        sub_list = [i + 1, explained_variance[i] / total, sum / total]
        ev_chart.append(sub_list)
    other_info = {
        "feature_list": feature_list,
        "feature_col": para_list["feature_col"],
        "ev_chart": ev_chart,
        "pcMatrix": mlmodel.components_.tolist(),
        "total_explained_variance": total
    }
    record["other_info"] = other_info

    output_cols = []
    for i in range(pred.shape[1]):
        output_col = "_pc_{}_".format(i + 1)
        output_cols.append(output_col)
        dataframe[output_col] = pred[:, i]
    # output["result"]["output_params"]["output_cols"] = output_cols

    # dataframe = dataframe.drop(para_list["feature_col"], axis=1)
    return mlmodel, dataframe
